# Changeable: site:KH or HC?,dataInput? site <- "HC" # HC, KH, KH&HC|HC&KH dataInput <-"C1D1" # 6 cases: C1D1, C2D1, C3D1 setwd("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/Constant") ##Results from different IS and infecting serotype infering models: if ( dataInput == "C1D1"){ data <- read.csv("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_C1D1.csv") } if ( dataInput == "C2D1"){ data <- read.csv("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_C2D1.csv") } if ( dataInput == "C3D1"){ data <- read.csv("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_C3D1.csv") } if ( dataInput == "C1F1"){ data <- read.csv("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_HomoAssigned_C1F1.csv") } if ( dataInput == "C2F1"){ data <- read.csv("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_HomoAssigned_C2F1.csv") } if ( dataInput == "C3F1"){ data <- read.csv("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/Infecting serotype Model/PredictedIS_B23_567_pred.serotype_HomoAssigned_C3F1.csv") } data$YEAR <- as.factor(data$YEAR) data$predictedIS<- ordered(data$predictedIS,levels=c("Secondary","Primary","Neg")) data$pred.serotype<- as.factor(data$pred.serotype) pop<- data[which(!is.na(data$AGE_MIN)&is.na(data$PanbioUnit)),]# cross out acute and ELISA samples pop$Site <- substr(pop$sampleID,1,2) #Agegroup_ 1year pop$AgeGroup <-factor(as.factor(floor(pop$Age)),labels=c("[1-2)","[2-3)","[3-4)","[4-5)","[5-6)","[6-7)","[7-8)","[8-9)","[9-10)","[10-11)","[11-12)","[12-13)","[13-14)","[14-15)","[15-16)","[16-17)","[17-18)","[18-19)","[19-20)","[20-21)","[21-22)","[22-23)","[23-24)","[24-25)","[25-26)","[26-27)","[27-28)","[28-29)","[29-30)","[30,31)"))# group as 1year 1 group. #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ## Agegroup_ 2 years : 16groups # pop$AgeGroup <-factor(as.factor(floor(pop$Age/2)+1),labels=c("[1-2)","[2-4)","[4-6)","[6-8)","[8-10)","[10-12)","[12-14)","[14-16)","[16-18)","[18-20)","[20-22)","[22-24)","[24-26)","[26-28)","[28-30)","[30-31)")) #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ##Agegroup_3years : 11 groups # pop$AgeGroup <- floor(pop$Age/3) + 1 # pop$AgeGroup[which(pop$AgeGroup == 11)] <- 10 # pop$AgeGroup <- factor( # pop$AgeGroup, # labels = c("[1-3)","[3-6)","[6-9)","[9-12)","[12-15)","[15-18)", # "[18-21)","[21-24)","[24-27)","[27-31)"))
For each age: numbers of samples: number of neg samples: numbers of samples classified as primary: number of samples classified as secondary:
library(dplyr) library(tidyr) # spread(x,y): spread rows of col x into collums, then value of col y will fit in as rows. #rename(new_name=old_name). if (site=="HC"){ p_year=dplyr::filter(pop,Site=="HC")%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)} if(site == "KH"){ p_year=dplyr::filter(pop,Site=="KH")%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)} if(site == "KH&HC"|site == "HC&KH"){ p_year=dplyr::filter(pop)%>%group_by(AgeGroup,YEAR,Site)%>%dplyr::count(predictedIS)%>%spread(predictedIS,n)} # p_year$total <- apply(p_year[,3:8],1,sum,na.rm=T) p_year$total <- apply(p_year[,4:6],1,sum,na.rm=T) p_year <- p_year[order(p_year$YEAR),] #create function for substr age rather than put age in order, this is useful in case of missing data in a specific age: my_substr <- function(s){ if (nchar(s) == 5){ t <- substr(s, 4, 4) }else{ if (nchar(s) == 6){ t <- substr(s, 4, 5) }else{ t <- substr(s, 5, 6) } } return(t) } # p_year$age<- apply(p_year[,1],1,my_substr) # Assign NA as 0 because stan does not support NA value: for (idx in 4:6){ idx.na <- which(is.na(p_year[[idx]])) p_year[[idx]][idx.na] <- 0 } # setnames(p_year, old=c("AgeGroup","1","2","3","4","Neg","Secondary","total"),new=c("AgeGroup","DV1.13","DV2.13","DV3.13","DV4.13","Neg.13","Sec.13","total.13"))
For each year in the past up to age a:FOI, in the following code this assumed to be equal across serotypes and the same for all years.
This gets the data in the right format for stan.
b = length(p_year$age) # number of years: HC: 34 yrs, KH: 35 yrs n_y <- 2017-2013+max(as.integer(p_year[which(p_year$YEAR==2013),]$age)) l_vecj = length(c(rep(1:round(n_y/5), each = 5))) #changes made 5/08/2020 Maxine -- datapp<- list(a = b, datap=c(p_year[1:b,6], p_year[1:b,5], p_year[1:b,4]), veca = as.integer(p_year$age), vectotal = as.integer(p_year$total)) #vector of total data points per age group # ---- library(rstan) # stanc("~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/Constant/Dengue_FOI_constant.stan") FOI_Dengue <-"~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/Constant/Dengue_FOI_constant_Maxine.stan"
Stan does not support NA in data? => Assign NA as 0
Some explanation
Rhat: < 1.05: model perform well n-eff ratio = n-eff/(0.5 * iter * chains) <1 Iterations: 1/2 Warmup : noise data=> exclude?!, 1/2 Sampling
Posterior distribution (likelihood function= model and prior distribution: e.g: lamda[0,1] ).
# you then run using this: From this output we can quickly assess model convergence by looking at the Rhat values for each parameter. When these are at or near 1, the chains have converged. There are many other diagnostics, but this is an important one for Stan. library(loo) fit<-stan(file = FOI_Dengue, data=datapp, iter=6000, chains=4, cores = parallel::detectCores()) # this may be conducted via 2 steps: stand_model() and sampling(). # save data for later use without running code again saveRDS(fit,paste("FOI constant",p_year$Site[1],dataInput,".rds")) # fit <- readRDS('/home/phuonght/Dropbox/1.SHARED_FOLDER/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/Constant/FOI constant KH C1D1 .rds') posterior<-rstan::extract(fit) # specify package rstan rather tham tidyr str(posterior) #this summarizes the parameters, gets mean,etc. of parameters and metrics of whether converged or not like Rhat and neff foisummary<-summary(fit) foisummary tab <- data.frame(foisummary$summary) loglike = extract_log_lik(fit, parameter_name = "log_like", merge_chains = TRUE) #write.csv(tab, paste("FOI constant",p_year$Site[1],dataInput,".csv"))
#plot the posterior distribution of lambda #create dataframe for violin plot: p <- data.frame(lambda = posterior$lambda) p$year <- rep("FOI") # violin plot lamb <- ggplot(p, aes(x=year,y=lambda)) + geom_violin()+ ggtitle(paste("posterior lambda _ constant",p_year$Site[1],dataInput))+ coord_cartesian(ylim=c (0.0,0.03))+ theme(plot.title = element_text(hjust=0.5,face="bold",size=20,colour = "blue"), axis.title.x = element_blank(), axis.text = element_text(face = "bold", angle = 0, hjust = 1))+ # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean # geom_boxplot(width=0.1)+# add median and quartile stat_summary(fun.data=mean_sdl, geom="pointrange", color="red")#Add mean and standard deviation # save the figure #ggsave(filename=paste("FOI_constant",site,dataInput,".png"), plot = lamb)
site <- "HC" # HC, KH, KH&HC|HC&KH dataInput <-"C1D1" # 6 cases: C1D1, C2D1, C3D1 fit <- readRDS('~/Traineeship/Dengue/slides_June_2018/PMA_batch2To7_July2019/pop data/models/FOI/Constant/FOI constant HC C1D1 .rds') posterior<-rstan::extract(fit) p <- data.frame(lambda = posterior$lambda) p$year <- rep("FOI") p6<-ggplot(p, aes(x=year,y=lambda)) + geom_violin()+ ggtitle(paste(site,dataInput))+ coord_cartesian(ylim=c (0.0,0.03))+ theme(plot.title = element_text(hjust=0.5,face="bold",size=12,colour = "blue"), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), axis.text = element_text(face = "bold", angle = 0, hjust = 1))+ # stat_summary(fun.y=mean, geom="point", size=2, color="red")+ # add mean # geom_boxplot(width=0.1)+# add median and quartile stat_summary(fun.data=mean_sdl, geom="pointrange", color="red")#Add mean and standard deviation library(gridExtra) library(grid) lamb<- grid.arrange(p1,p2,p3,p4,p5,p6,nrow=2,ncol=3,bottom=textGrob("Posterior lambda distribution_constant",gp=gpar(fontsize=20,font=8,col="dark blue"))) # ggsave(filename ="Posterior lambda distribution_constant.png",plot=lamb )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.